# -*- coding: utf-8 -*-
import requests,time,re
import threadpool
import json,os,redis
import xlwt,xlrd,random
import urllib.request
from lxml import etree


url = 'https://mm.taobao.com/search_tstar_model.htm?spm=5679.126488.640745.2.38a457c5mEWpPl&style=&place=city%3A%E5%B9%BF%E5%B7%9E'
url1='https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8'


class tn(object):

    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
        'cookie': 'thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; linezing_session=dTr7U8KOxddDd4CJ4VFihDPH_1506516599348AOqB_1; v=0; _tb_token_=5b9375847e363; _m_h5_tk=4efc0ba8d72376fa1968a3f0a92f0eef_1506518851245; _m_h5_tk_enc=e10a67c79b8a47f97dd3134779acfdfe; uc3=sg2=URsQfTD%2BFY9mkKOl%2FNBXqNFPPUNKq8HjGx%2Bair7O99U%3D&nk2=UoCKEw%2B1myb2u1mo&id2=UoCJiFOLhjN6OQ%3D%3D&vt3=F8dBzWk7FANQZ7%2B830Y%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D; existShop=MTUwNjk0NjQwMQ%3D%3D; uss=VFcwj3YmzKmO6xkbkJFH%2FN%2FOd2CPNJzRBWBygIM3IKKXIgbm1DSeGb87; lgc=1132771621aa; tracknick=1132771621aa; cookie2=11aae79de97ae344158e4aa965c7003c; sg=a2d; cookie1=Aihx9FxoyUYIE7uEPgeqstl%2B5uvfGslyiCQ%2FpePYriI%3D; unb=1100473042; skt=11ea4b0360e50e08; t=b63e6968872da200706b694d67c62883; _cc_=UtASsssmfA%3D%3D; tg=0; _l_g_=Ug%3D%3D; _nk_=1132771621aa; cookie17=UoCJiFOLhjN6OQ%3D%3D; cna=1/K/EZz4HDECAXhVTdivCBle; mt=ci=45_1; isg=Anx8i770Cd1_Zz2Ede24lLr7TRqCdCGCcQXP_1b95GdKIR2rfoTZL77ZdX-i; JSESSIONID=F34B74BB5A7A0A1BF96E8B3F2C02DE87; uc1=cookie14=UoTcCfmfxB%2Fd7g%3D%3D&lng=zh_CN&cookie16=WqG3DMC9UpAPBHGz5QBErFxlCA%3D%3D&existShop=false&cookie21=U%2BGCWk%2F7owY3i1vB1W2BgQ%3D%3D&tag=8&cookie15=UtASsssmOIJ0bQ%3D%3D&pas=0',

    }

    def getUrlinfo(self,page):

        datas=[]
        pageurl = 'https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8'

        data = {
            'q':'',
            'viewFlag':'A',
            'sortType':'default',
            'searchStyle':'',
            'searchRegion':'city:',
            'searchFansNum':'',
            'currentPage':'%s'%(page),
            'pageSize':'100'
        }
        try:
            while True:
                time.sleep(1)
                reqs = requests.post(pageurl,data=data,headers=self.headers,timeout=5)
                if reqs.status_code ==200:
                    break
                else:
                    print('field')
        except Exception as e:
            print('error:',e)
        dictx = json.loads(str(reqs.text))
        t = dictx['data']['searchDOList']
        for i in t:
            r =  i['realName'],i['height'],i['weight'],i['city'],i['userId']
            #userid = i['userId']
            datas.append(r)
        return datas  #返回模特们信息数据


    def getImages(self,rs):

        a=0
        for id in rs:
            #print(id)
            os.mkdir(os.path.join('D:\\SpiderProject\\ZhiHu\\taonvlang\\img', str(id[0])))
            imagurl = 'https://mm.taobao.com/self/album/open_album_list.htm?_charset=utf-8&user_id%20='+str(id[4])+'\n'
            try:
                with open('idurl.txt','a+') as f:
                    f.write(imagurl)
            except Exception as e:
                print('error:',e)

            try:

                html1 = requests.get(imagurl,headers=self.headers,timeout=3)
                reqsones = str(html1.text)
                #print(reqsones)
            except Exception as e:
                print('error:',e)
            urls = etree.HTML(reqsones)
            imagesurl = urls.xpath('//a[@class="mm-first"]/@href')#获取淘女郎对应相册url
            #print(imagesurl)
            ad = 'album_id=\d{11}|album_id=\d{8}|album_id=\d{9}'#获取album_id
            album_id = re.compile(ad)
            try:
                result = album_id.findall(str(imagesurl))
            except Exception as e:
                print('Error:',e)

            for im in result:
                pturl = 'https://mm.taobao.com/album/json/get_album_photo_list.htm?user_id=%s&%s&top_pic_id=0&page=0'%(id[4],im)+'\n'
                try:
                    with open('imges.txt','a+') as file:
                        file.write(pturl)
                except Exception as e:
                    print('error:',e)

                time.sleep(1)
                try:
                    html5 = requests.get(pturl,headers=self.headers,timeout=3)#获取json图片URL
                except Exception as e:
                    print('Error:',e)
                print('获取json图片URL')
                jsons =json.loads(str(html5.text))
                try:
                    pic = jsons['picList']
                except KeyError as e:
                    print('Error:',e)

                for ius in pic:
                        a+=1
                        iu = ius['picUrl']
                        imurl = 'http:'+ str(iu)+'\n'
                        try:
                            with open('im.txt','a+') as fs:
                                fs.write(imurl)
                        except Exception as e:
                            print('error:',e)
                        filename = os.path.join('D:\\SpiderProject\\ZhiHu\\taonvlang\\img\\%s'%(id[0]),str(a)+'.jpg')
                        print('下载图片成功.....')
                        try:
                            file = urllib.request.urlretrieve(str(imurl),filename)
                        except Exception as e:
                            print('Error:',e)

    def getInfophone(self):

        userurl = 'https://mm.taobao.com/self/aiShow.htm?spm=719.7763510.1998643336.1.6WJFuT&userId=277949921'
        html = requests.get(userurl,headers=self.headers,timeout=5)
        html.encoding = 'GBK'
        print(html.encoding)
        selector = etree.HTML(str(html.text))
        phone = selector.xpath('//strong[@style="font-family: simhei;color: #000000;font-size: 24.0px;line-height: 1.5;"]|//span[@style="font-size: 24.0px;"]/text()')
        return phone



    def saveInfo(self,p):

        a,b = 1,0
        workbook = xlwt.Workbook(encoding='ascii')
        worksheet = workbook.add_sheet('My Worksheet')
        worksheet.write(0,0, label='姓名')
        worksheet.write(0, 1, label='身高')
        worksheet.write(0, 2, label='体重')
        worksheet.write(0, 3, label='城市')
        worksheet.write(0, 3, label='QQ')
        worksheet.write(0, 3, label='电话号码')
        worksheet.write(0, 3, label='微信')

        while a<=30 or b<=5:
            for names in p:
                n = names[0]
                w = names[1]
                h = names[2]
                c = names[3]
                a+=1
                worksheet.write(3, 5, label=str(n))

        workbook.save('Excel_Workbook.xls')

    def saveRedis(self):
        import redis
        r = redis.Redis(host='127.0.0.1',port=int(6379))




if __name__ =="__main__":
    t = tn()
    for ii in range(1):
            rs = t.getUrlinfo(ii)
            print('正在爬取淘女郎模特第%s页.....'%ii)
            #t.saveInfo(rs)
            #print(rs)
            t.getImages(rs)

